In [ ]:
# Course: Applied_Data_Science_with_Python (11 classes)
# Numpy
# Pandas
# Matplotlib
# Seaborn
# Maths and Statistics Fundamentals
# Probability Distribution
# Advanced Statistics
# Data Cleaning/Wrangling
# Feature Engineering
# Projects
In [ ]:
# Journey to Data Science
# CRISP DM Framework

image.png

In [ ]:
 

Numpy¶

In [ ]:
# Numpy
# Numpy is the backbone of Machine Learning and Data Science in Python.
# It is one of the most important libraries in Python for numerical computing.
# Num - Numerical, Py - Python
# I know lists, why Numpy?
# Numpy is a library that provides support for large, multi-dimensional arrays and matrices, along with a collection of 
# mathematical functions to operate on these arrays.
# It is a powerful library that is used for scientific computing in Python.
# It is super fast and efficient as it is implemented in C and Fortran.

# Numpy creates Arrays.
# Arrays are collections of items of the same type.
In [4]:
# Function to find factorial of a number
# This function is in Python language
def factorial(n):
    if n == 0 or n == 1:
        return 1
    else:
        return n * factorial(n - 1)
# Test the function
print(factorial(5))  # Output: 120

# However, Numpy is implemented in C and Fortran, so it is much faster than Python.
120
In [5]:
# Let us start with Numpy.

# import the numpy library
import numpy as np

# Print the version of numpy
print(np.__version__)
1.26.4
In [8]:
# Let us create a 1D array
# Syntax: np.array(data, dtype)
# data - array-like object
# dtype - data type of the array. It is optional. If not provided, numpy will try to infer the data type from the data.

arr = np.array([1,3,5,2,8])
print(arr)
print(type(arr))

# Indexing - same as lists
print(arr[0])  # 1
print(arr[1])  # 3
print(arr[-1])  # 8
print(arr[-2])  # 2

# Slicing - same as lists
print(arr[0:3])  # SI=0, EI=(3-1)=2
[1 3 5 2 8]
<class 'numpy.ndarray'>
1
3
8
2
[1 3 5]
In [ ]:
# Let us create a 1D array
# Syntax: np.array(data, dtype)

arr = np.array([1,3,5,2,8])
print(arr)
print(type(arr))
print(arr.dtype)
# int64: 64 bit integer
# int32: 32 bit integer
[1 3 5 2 8]
<class 'numpy.ndarray'>
int64
In [10]:
# Let us create a 1D array
# Syntax: np.array(data, dtype)

arr = np.array([1,3,5, 2.5, 8])
print(arr)
print(type(arr))
print(arr.dtype)
[1.  3.  5.  2.5 8. ]
<class 'numpy.ndarray'>
float64
In [11]:
# Let us create a 1D array
# Syntax: np.array(data, dtype)

arr = np.array([1,3,5,2,8], dtype='float')
print(arr)
print(type(arr))
print(arr.dtype)
[1. 3. 5. 2. 8.]
<class 'numpy.ndarray'>
float64
In [12]:
# Let us create a 1D array
# Syntax: np.array(data, dtype)

arr = np.array([1,3,5,2,8], dtype='float64')
print(arr)
print(type(arr))
print(arr.dtype)
[1. 3. 5. 2. 8.]
<class 'numpy.ndarray'>
float64
In [13]:
# Let us create a 1D array
# Syntax: np.array(data, dtype)

arr = np.array([1,3,5,2,8], dtype='int')
print(arr)
print(type(arr))
print(arr.dtype)
[1 3 5 2 8]
<class 'numpy.ndarray'>
int64
In [14]:
# Let us create a 1D array
# Syntax: np.array(data, dtype)

arr = np.array([1,3, 5.22, 2,8], dtype='int')
print(arr)
print(type(arr))
print(arr.dtype)
[1 3 5 2 8]
<class 'numpy.ndarray'>
int64
In [20]:
# Let us create a 1D array
# Syntax: np.array(data, dtype)

arr = np.array([1,3,5,2,8])
print(arr)
print(type(arr))
print(arr.shape)  
[1 3 5 2 8]
<class 'numpy.ndarray'>
(5,)
In [16]:
# Let us create a 2D array
arr = np.array([[10, 20, 30, 40],[50, 60, 70, 80]])
print(arr)
print(type(arr))
[[10 20 30 40]
 [50 60 70 80]]
<class 'numpy.ndarray'>
In [22]:
# Let us create a 2D array
arr = np.array([
    [10, 20, 30, 40],
    [50, 60, 70, 80]
])
print(arr)
print(type(arr))
# dtype - data type of the array
# shape - shape of the array i.e. number of rows and columns
# dtype and shape are attributes of the array (not methods)
print(arr.dtype)
print(arr.shape)  

# Indexing
print(arr[0][1])
# or
print(arr[0, 1]) 
[[10 20 30 40]
 [50 60 70 80]]
<class 'numpy.ndarray'>
int64
(2, 4)
20
20

image.png

In [ ]:
# Let us create a 3D array
arr = np.array([
    [
        [10, 20, 30],
        [40, 50, 60]
    ],
    [
        [70, 80, 90],
        [100, 110, 120]
    ]
])
print(arr)
print(type(arr))
print(arr.shape)  # (2, 2, 3) - 2 arrays, each of 2 rows and 3 columns
print(arr[0][1][2])  # 60
# or
print(arr[0, 1, 2])  # 60
print(arr[1][0][2])  # 90
[[[ 10  20  30]
  [ 40  50  60]]

 [[ 70  80  90]
  [100 110 120]]]
<class 'numpy.ndarray'>
(2, 2, 3)
60
60
90

image.png

In [30]:
arr = np.array([101, 20, -30, 40, 50])
print(arr)
print(np.argmin(arr))  # 2
print(np.argmax(arr))  # 0
print(np.argsort(arr))  # [2, 0, 1, 3, 4] - index of sorted array
[101  20 -30  40  50]
2
0
[2 1 3 4 0]

image.png

In [31]:
# Let us create a 1D array
# Syntax: np.array(data, dtype)
# https://numpy.org/devdocs/user/quickstart.html

arr = np.array([10, 20, 30, 40, 50])
print(arr)
print(type(arr))
print(arr.dtype)
# Basic functions of numpy - arithmetic functions
print(np.sum(arr)) # 150
print(np.min(arr)) # 10
print(np.max(arr)) # 50
print(np.mean(arr)) # 30.0
print(np.median(arr)) # 30.0
print(np.std(arr)) # 14.142135623730951
print(np.var(arr)) # 200.0
print(np.prod(arr)) # 6000000
print(np.cumsum(arr)) # [ 10  30  60 100 150]
print(np.cumprod(arr)) # [ 10 200 6000 240000 12000000]
print(np.diff(arr)) # [10 10 10 10]
print(np.nonzero(arr)) # (array([0, 1, 2, 3, 4]),)
print(np.sort(arr)) # [10 20 30 40 50]
print(np.argsort(arr)) # [0 1 2 3 4]
print(np.argmin(arr)) # 0
print(np.argmax(arr)) # 4
[10 20 30 40 50]
<class 'numpy.ndarray'>
int64
150
10
50
30.0
30.0
14.142135623730951
200.0
12000000
[ 10  30  60 100 150]
[      10      200     6000   240000 12000000]
[10 10 10 10]
(array([0, 1, 2, 3, 4]),)
[10 20 30 40 50]
[0 1 2 3 4]
0
4
In [32]:
# Let us create a 1D array
# Syntax: np.array(data, dtype)

arr = np.array([1, 3, 5, 2.2, 8])
print(arr)
print(type(arr))
print(arr.dtype)
# Basic functions of numpy - arithmetic functions
print(np.sum(arr)) # 150
print(np.min(arr)) # 10
print(np.max(arr)) # 50
print(np.mean(arr)) # 30.0
print(np.median(arr)) # 30.0
print(np.std(arr)) # 14.142135623730951
print(np.var(arr)) # 200.0
[1.  3.  5.  2.2 8. ]
<class 'numpy.ndarray'>
float64
19.2
1.0
8.0
3.84
3.0
2.4540578640284747
6.0224
In [33]:
# Let us create a 1D array
# Syntax: np.array(data, dtype)

arr = np.array([1, 3, 5, 2.2, 8, True, False])
print(arr)
print(type(arr))
print(arr.dtype)

# Let us create a 1D array
# Syntax: np.array(data, dtype)

arr = np.array([1, 3, 5, 2.2, 8])
print(arr)
print(type(arr))
print(arr.dtype)
# Basic functions of numpy - arithmetic functions
print(np.sum(arr)) # 150
print(np.min(arr)) # 10
print(np.max(arr)) # 50
print(np.mean(arr)) # 30.0
print(np.median(arr)) # 30.0
print(np.std(arr)) # 14.142135623730951
print(np.var(arr)) # 200.0
[1.  3.  5.  2.2 8.  1.  0. ]
<class 'numpy.ndarray'>
float64
[1.  3.  5.  2.2 8. ]
<class 'numpy.ndarray'>
float64
19.2
1.0
8.0
3.84
3.0
2.4540578640284747
6.0224
In [37]:
# Let us create a 1D array
# Syntax: np.array(data, dtype)

arr = np.array([1, 3, 5, 2.2, 8, True, False])
print(arr)
print(type(arr))
print(arr.dtype)

# Basic functions of numpy - arithmetic functions
print(np.sum(arr)) # 150
print(np.min(arr)) # 10
print(np.max(arr)) # 50
print(np.mean(arr)) # 30.0
print(np.median(arr)) # 30.0
print(np.std(arr)) # 14.142135623730951
print(np.var(arr)) # 200.0
[1.  3.  5.  2.2 8.  1.  0. ]
<class 'numpy.ndarray'>
float64
20.2
0.0
8.0
2.8857142857142857
2.2
2.5787198216105884
6.649795918367346
In [ ]:
# Let us create a 1D array
# Syntax: np.array(data, dtype)

arr = np.array([1, 3, 5, 2.2, 8, True, False, "Darshan"])
print(arr)
print(type(arr))
print(arr.dtype)

# Basic functions of numpy - arithmetic functions
# print(np.sum(arr)) # 150
# print(np.min(arr)) # 10
# print(np.max(arr)) # 50
# print(np.mean(arr)) # 30.0
# print(np.median(arr)) # 30.0
# print(np.std(arr)) # 14.142135623730951
# print(np.var(arr)) # 200.0
['1' '3' '5' '2.2' '8' 'True' 'False' 'Darshan']
<class 'numpy.ndarray'>
<U32
---------------------------------------------------------------------------
UFuncTypeError                            Traceback (most recent call last)
Cell In[39], line 10
      7 print(arr.dtype)
      9 # Basic functions of numpy - arithmetic functions
---> 10 print(np.sum(arr)) # 150
     11 print(np.min(arr)) # 10
     12 print(np.max(arr)) # 50

File /opt/anaconda3/lib/python3.11/site-packages/numpy/core/fromnumeric.py:2313, in sum(a, axis, dtype, out, keepdims, initial, where)
   2310         return out
   2311     return res
-> 2313 return _wrapreduction(a, np.add, 'sum', axis, dtype, out, keepdims=keepdims,
   2314                       initial=initial, where=where)

File /opt/anaconda3/lib/python3.11/site-packages/numpy/core/fromnumeric.py:88, in _wrapreduction(obj, ufunc, method, axis, dtype, out, **kwargs)
     85         else:
     86             return reduction(axis=axis, out=out, **passkwargs)
---> 88 return ufunc.reduce(obj, axis, dtype, out, **passkwargs)

UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U32'), dtype('<U32')) -> None

image.png

In [40]:
arr = np.zeros(5)
print(arr)
[0. 0. 0. 0. 0.]
In [41]:
arr = np.zeros(12)
print(arr)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
In [42]:
arr = np.zeros(5, dtype='int')
print(arr)
[0 0 0 0 0]
In [45]:
arr = np.zeros([3,5])
print(arr)
print(type(arr))
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
<class 'numpy.ndarray'>
In [46]:
arr = np.zeros((3,5))
print(arr)
print(type(arr))
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
<class 'numpy.ndarray'>
In [47]:
arr = np.zeros([3,5], dtype='int')
print(arr)
[[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
In [48]:
arr = np.ones([3,5], dtype='int')
print(arr)
[[1 1 1 1 1]
 [1 1 1 1 1]
 [1 1 1 1 1]]
In [50]:
# arr = np.twos([3,5], dtype='int')
# print(arr)
In [53]:
# Identity matrix
arr = np.identity(3)
print(arr)
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
In [54]:
# Identity matrix
arr = np.identity(3, dtype='int')
print(arr)
[[1 0 0]
 [0 1 0]
 [0 0 1]]
In [55]:
# eye - eye is a function that creates an identity matrix
arr = np.eye(3)
print(arr)
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
In [56]:
# eye - eye is a function that creates an identity matrix
arr = np.eye(3, dtype='int')
print(arr)
[[1 0 0]
 [0 1 0]
 [0 0 1]]
In [57]:
# eye - eye is a function that creates an identity matrix
arr = np.eye(5, dtype='int')
print(arr)
[[1 0 0 0 0]
 [0 1 0 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]]
In [58]:
# eye - eye is a function that creates an identity matrix
arr = np.eye(5, dtype='int', k=0)
print(arr)
[[1 0 0 0 0]
 [0 1 0 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]]
In [59]:
# eye - eye is a function that creates an identity matrix
arr = np.eye(5, dtype='int', k=1)
print(arr)
[[0 1 0 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]
 [0 0 0 0 0]]
In [60]:
# eye - eye is a function that creates an identity matrix
arr = np.eye(5, dtype='int', k=2)
print(arr)
[[0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]
 [0 0 0 0 0]
 [0 0 0 0 0]]
In [61]:
# eye - eye is a function that creates an identity matrix
arr = np.eye(5, dtype='int', k=3)
print(arr)
[[0 0 0 1 0]
 [0 0 0 0 1]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
In [62]:
# eye - eye is a function that creates an identity matrix
arr = np.eye(5, dtype='int', k=4)
print(arr)
[[0 0 0 0 1]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
In [63]:
# eye - eye is a function that creates an identity matrix
arr = np.eye(5, dtype='int', k=5)
print(arr)
[[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
In [64]:
# eye - eye is a function that creates an identity matrix
arr = np.eye(5, dtype='int', k=6)
print(arr)
[[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
In [65]:
# eye - eye is a function that creates an identity matrix
arr = np.eye(5, dtype='int', k=-1)
print(arr)
[[0 0 0 0 0]
 [1 0 0 0 0]
 [0 1 0 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]]
In [66]:
# eye - eye is a function that creates an identity matrix
arr = np.eye(5, dtype='int', k=-2)
print(arr)
[[0 0 0 0 0]
 [0 0 0 0 0]
 [1 0 0 0 0]
 [0 1 0 0 0]
 [0 0 1 0 0]]

image.png

In [ ]:
# rand() - random numbers between 0 and 1
arr = np.random.rand(5)
print(arr)
# randn() - random numbers between -1 and 1
arr = np.random.randn(5)
print(arr)
# randint() - random integers between low and high
arr = np.random.randint(1, 10, 5)
print(arr)
# random() - random numbers between 0 and 1
arr = np.random.random(5)
print(arr)
[0.36892978 0.27683991 0.49056933 0.40019273 0.47214066]
[-0.4856613   0.34246612 -0.32958639  0.95861498 -1.03784037]
[7 2 6 9 1]
[0.79597352 0.10756547 0.91163019 0.52145023 0.79782897]

image-2.png

In [103]:
arr = np.array([
    [11, 22, 33],
    [44, 55, 66],
    [77, 88, 99]
])
print(arr)
print()

X = arr[ : , :-1 ]  # all rows, all columns except last column
print(X)

print()
Y = arr[ : , -1 ]   # all rows, last column
print(Y)
[[11 22 33]
 [44 55 66]
 [77 88 99]]

[[11 22]
 [44 55]
 [77 88]]

[33 66 99]
In [104]:
# range()
for i in range(5):
    print(i, end=' ')
0 1 2 3 4 
In [107]:
# arange()

arr = np.arange(5)
print(arr)
print(type(arr))
[0 1 2 3 4]
<class 'numpy.ndarray'>
In [108]:
np.arange(12)
Out[108]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
In [109]:
np.arange(1, 12)
Out[109]:
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
In [110]:
np.arange(1,12,2)
Out[110]:
array([ 1,  3,  5,  7,  9, 11])
In [112]:
np.arange(12)
Out[112]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
In [111]:
np.arange(12).reshape(3,4)
Out[111]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])
In [113]:
np.arange(12).reshape(4,3)
Out[113]:
array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])
In [114]:
np.arange(12).reshape(6,2)
Out[114]:
array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11]])
In [115]:
np.arange(12).reshape(2,6)
Out[115]:
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])
In [117]:
np.arange(12)
Out[117]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
In [116]:
np.arange(12).reshape(1,12)
Out[116]:
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11]])
In [118]:
np.arange(12).reshape(3,2,2)
Out[118]:
array([[[ 0,  1],
        [ 2,  3]],

       [[ 4,  5],
        [ 6,  7]],

       [[ 8,  9],
        [10, 11]]])
In [119]:
np.arange(12).reshape(2,2,3)
Out[119]:
array([[[ 0,  1,  2],
        [ 3,  4,  5]],

       [[ 6,  7,  8],
        [ 9, 10, 11]]])
In [121]:
np.arange(12).reshape(3,2,2,1)
Out[121]:
array([[[[ 0],
         [ 1]],

        [[ 2],
         [ 3]]],


       [[[ 4],
         [ 5]],

        [[ 6],
         [ 7]]],


       [[[ 8],
         [ 9]],

        [[10],
         [11]]]])
In [124]:
# advanced Slicing
arr = np.array([
    [0, 1, 2],
    [3, 4, 5],
    [6, 7, 8]
])
print(arr)
print()
print(arr[[0,1,2], [1,0,0]])
[[0 1 2]
 [3 4 5]
 [6 7 8]]

[1 3 6]

image.png

In [125]:
# Broadcasting
# Broadcasting is a powerful mechanism that allows numpy to work with arrays of different shapes when performing arithmetic operations.
# It is a way of performing operations on arrays of different shapes.

arr = np.arange(15).reshape(3, 5)
print(arr)
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]

image.png

In [126]:
arr + 5
Out[126]:
array([[ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])
In [127]:
arr - 2
Out[127]:
array([[-2, -1,  0,  1,  2],
       [ 3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12]])
In [128]:
arr * 2
Out[128]:
array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28]])
In [129]:
arr
Out[129]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
In [130]:
arr1 = np.arange(15).reshape(5, 3)
arr2 = np.arange(5).reshape(5, 1)
print(arr1)
print()
print(arr2)
arr1 + arr2
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]

[[0]
 [1]
 [2]
 [3]
 [4]]
Out[130]:
array([[ 0,  1,  2],
       [ 4,  5,  6],
       [ 8,  9, 10],
       [12, 13, 14],
       [16, 17, 18]])

image.png

In [134]:
# Linear Algebra
# Linear Algebra is a branch of mathematics that deals with vectors, matrices, and linear transformations.
# It is a fundamental part of machine learning and data science.
# It is used in various algorithms such as linear regression, logistic regression, and support vector machines.
# It is also used in deep learning and neural networks.
# Numpy was initially developed for linear algebra and matrix operations.
In [135]:
# Matrix Multiplication

A = np.array([[1, 2, 3], [4, 5, 6]])
B = np.array([[7, 8], [9, 10], [11, 12]])
# A (mxp) * B (pxn) = C (mxn)
C = np.dot(A, B)
print(C)
# or
C = A @ B
print(C)
# or
C = np.matmul(A, B)
print(C)
[[ 58  64]
 [139 154]]
[[ 58  64]
 [139 154]]
[[ 58  64]
 [139 154]]
In [137]:
# Transpose
A = np.array([[1, 2, 3], [4, 5, 6]])
print(A)
print(A.T)
# or
print(np.transpose(A))
# or
print(A.transpose())
[[1 2 3]
 [4 5 6]]
[[1 4]
 [2 5]
 [3 6]]
[[1 4]
 [2 5]
 [3 6]]
[[1 4]
 [2 5]
 [3 6]]

image.png

In [139]:
# Solve linear equations
# Consider the following system of equations:
# 7x + 5y - 3z = 16
# 3x - 5y + 2z = -8
# 5x + 3y - 7z = 0
# Find the values of x, y, and z.
a = np.array([[7, 5, -3], [3, -5, 2], [5, 3, -7]])
b = np.array([16, -8, 0])
np.linalg.solve(a, b)
Out[139]:
array([1., 3., 2.])
In [140]:
# Determinant of a matrix
# The determinant of a matrix is a scalar value that is a function of the entries of a square matrix.
# https://www.mathsisfun.com/algebra/matrix-determinant.html

A = np.array([[1, 2], [3, 4]])
print(A)
print(np.linalg.det(A))  # -2.0
[[1 2]
 [3 4]]
-2.0000000000000004
In [141]:
# Inverse of a matrix
# The inverse of a matrix is a matrix that, when multiplied with the original matrix, gives the identity matrix.
# https://www.mathsisfun.com/algebra/matrix-inverse.html

A = np.array([[1, 2], [3, 4]])
print(A)
print(np.linalg.inv(A))  # [[-2.   1. ]
[[1 2]
 [3 4]]
[[-2.   1. ]
 [ 1.5 -0.5]]
In [142]:
# Trace of a matrix
# The trace of a matrix is the sum of the diagonal elements of a square matrix.

A = np.array([[1, 2], [3, 4]])
print(A)
print(np.trace(A))  # 5
[[1 2]
 [3 4]]
5
In [143]:
"""
np.linspace() returns evenly spaced numbers over a specified interval. It is commonly used to create a sequence of values, especially when you need a specific number of points between two bounds (start and stop), including both endpoints by default.

start: The starting value of the sequence.

stop: The end value of the sequence.

num: Number of evenly spaced samples to generate (default is 50).

endpoint: If True (default), stop is the last sample. If False, it is not included.

retstep: If True, returns the step size along with the array.

dtype: The data type of the output array.

axis: The axis in the result along which the linspace samples are stored.
"""

# Generate 5 numbers between 0 and 1
values = np.linspace(0, 1, num=5)
print("Linearly spaced values:", values)
Linearly spaced values: [0.   0.25 0.5  0.75 1.  ]
In [144]:
np.linspace(0, 1, num=7)
Out[144]:
array([0.        , 0.16666667, 0.33333333, 0.5       , 0.66666667,
       0.83333333, 1.        ])
In [145]:
"""

np.empty() creates an array without initializing the entries. The array will contain arbitrary, uninitialized values (i.e., whatever happens to be in memory at that time). This is faster than functions like zeros() or ones() because it skips filling in default values.

shape: Shape of the array (e.g., (3, 3)).

dtype: Desired data type (default is float).

order: Memory layout – 'C' for row-major (C-style), 'F' for column-major (Fortran-style).
"""

# Create an uninitialized array of shape (2, 3)
arr = np.empty((2, 3))
print("Uninitialized array:\n", arr)
Uninitialized array:
 [[0.16666667 0.33333333 0.5       ]
 [0.66666667 0.83333333 1.        ]]
In [146]:
"""np.flatten() is a method used on NumPy arrays to return a copy of the array collapsed into one dimension (a 1D array)."""

arr = np.array([
    [1, 2], 
    [3, 4]
    ])
flat_arr = arr.flatten()

print("Original array:\n", arr)
print("Flattened array:", flat_arr)
Original array:
 [[1 2]
 [3 4]]
Flattened array: [1 2 3 4]
In [ ]:
"""np.ravel() returns a flattened 1D view of an array whenever possible, meaning it avoids copying data unless necessary."""
arr = np.array([[1, 2], [3, 4]])
raveled_arr = np.ravel(arr)

print("Original array:\n", arr)
print("Raveled array:", raveled_arr)
# Unlike flatten(), which always returns a copy, ravel() returns a view if possible, making it more memory efficient.
Original array:
 [[1 2]
 [3 4]]
Raveled array: [1 2 3 4]
In [150]:
arr = np.array([[[1, 2], [3, 4]]])  # Shape: (1, 2, 2)
print("Original array:\n", arr)
swapped = np.swapaxes(arr, 0, 2)

print("Original shape:", arr.shape)
print("Swapped shape:", swapped.shape)
print("Swapped array:\n", swapped)
Original array:
 [[[1 2]
  [3 4]]]
Original shape: (1, 2, 2)
Swapped shape: (2, 2, 1)
Swapped array:
 [[[1]
  [3]]

 [[2]
  [4]]]
In [153]:
"""np.hstack() horizontally stacks arrays (along axis 1, i.e., columns). It joins arrays with the same number of rows (first dimension).
tup: A sequence (e.g., list or tuple) of arrays to be stacked"""

a = np.array([1, 2])
b = np.array([3, 4])

result = np.hstack((a, b))
print("Horizontally stacked array:\n", result)

"""np.vstack() vertically stacks arrays (along axis 0, i.e., rows). It joins arrays with the same number of columns (second dimension).
tup: A sequence (e.g., list or tuple) of arrays to be stacked"""

result = np.vstack((a, b))
print("Vertically stacked array:\n", result)
Horizontally stacked array:
 [1 2 3 4]
Vertically stacked array:
 [[1 2]
 [3 4]]
In [154]:
"""
np.where() is a versatile function used to locate indices where a condition is True, or to select values based on a condition.

If only condition is provided, it returns the indices of elements where the condition is True.

If x and y are provided, it returns an array where elements are chosen from x if the condition is True, and from y otherwise (element-wise selection).
"""

arr = np.array([10, 20, 30, 40])
indices = np.where(arr > 25)
print("Indices where arr > 25:", indices)
Indices where arr > 25: (array([2, 3]),)

End of Numpy Module¶

In [ ]:
import time
import numpy as np

# List addition
a = list(range(1000000)) # [0, 1, 2, ..., 999999]
b = list(range(1000000)) # [0, 1, 2, ..., 999999]

start = time.time()
c = [x + y for x, y in zip(a, b)] # [0+0, 1+1, 2+2, ..., 999999+999999]
end = time.time()
print("List time:", end - start)

# NumPy addition
a_np = np.arange(1000000)
b_np = np.arange(1000000)

start = time.time()
c_np = a_np + b_np
end = time.time()
print("NumPy time:", end - start)
List time: 0.05669212341308594
NumPy time: 0.0021271705627441406

NumPy Exercise Questions¶

  1. Create a 1D array that contains the ages of 5 employees. Hint: np.array()

  2. Create a NumPy array with the marks of 6 students in Mathematics and print its data type. Hint: Use dtype parameter

  3. Create an array of 5 product prices, ensuring they are stored as float64. Hint: dtype='float64'

  4. Create a NumPy array for sales values [1200, 1500.5, 1300, 1700.25, 1600] and print its data type. Hint: np.array([...])

  5. Convert a list of temperatures [25, 30, 28, 35] into a NumPy array and check if it is of type int. Hint: type() and dtype

  6. Slice a 1D array of quarterly sales data and print values from the 2nd to the 4th quarter. Hint: array slicing arr[start:end]

  7. Access the last two elements from an array of monthly rainfall values. Hint: Negative indexing

  8. Create a 1D NumPy array of the first 10 even numbers. Hint: Use np.arange() with a step

  9. Create an array of 8 zeros representing uninitialized temperature sensors. Hint: np.zeros()

  10. Create an array of 5 ones to represent fully charged batteries. Hint: np.ones()

  11. Create a 2D array to represent marks of 3 students in 4 subjects. Hint: Nested list inside np.array()

  12. Use reshape() to convert a 1D array of 12 values into a 3x4 matrix representing quarterly sales. Hint: arr.reshape(3, 4)

  13. Create a 2D array of size 5x5 filled with 7s using a single function. Hint: np.full()

  14. Create a 3x3 identity matrix representing a transformation matrix in linear algebra. Hint: np.eye()

  15. Generate a matrix of random integers between 1 and 100 of shape (3, 5). Hint: np.random.randint()

  16. Generate a 1D array of 10 random floats between 0 and 1. Hint: np.random.rand()

  17. Find the maximum and minimum values in a NumPy array representing product ratings. Hint: np.max(), np.min()

  18. Find the mean and standard deviation of an array of students' scores. Hint: np.mean(), np.std()

  19. Perform element-wise multiplication between two arrays representing unit price and quantity sold. Hint: `` operator*

  20. Create a 2D array and extract an entire row using indexing. Hint: arr[row_index]

  21. Extract a specific column from a 2D array of employee attendance (rows: employees, cols: days). Hint: arr[:, column_index]

  22. Create a copy of an array and modify the original. Show that the copied version remains unchanged. Hint: arr.copy()

  23. Sort a NumPy array of employee ages in ascending order. Hint: np.sort()

  24. Check how many values in a NumPy array are greater than 50 using boolean indexing. Hint: arr > 50

  25. Replace all values below 40 in a NumPy array of student scores with the value 40 (grace marks). Hint: Boolean indexing and assignment



Advanced NumPy Tasks¶


Task 26: Normalize a NumPy array of student scores to a 0–1 scale using min-max normalization. Hint: This is crucial in data preprocessing for machine learning. Use the formula:

normalized = (arr - np.min(arr)) / (np.max(arr) - np.min(arr))

Test with an array like: [56, 78, 45, 90, 67].


Task 27: Stack two arrays vertically—one with product IDs and one with their prices. Hint: This helps in data merging or batch processing. Use np.vstack((arr1, arr2)) where:

  • arr1 = np.array([101, 102, 103])
  • arr2 = np.array([250, 300, 275])

Task 28: Use np.where() to flag all sales below the monthly target of 1000. Hint: This is a conditional filtering operation used in feature engineering or flag generation. Example:

np.where(sales_array < 1000, 'Low', 'OK')

Task 29: Use np.unique() to find unique categories and their counts from a customer segment array. Hint: This is widely used in EDA for categorical analysis. Example input: np.array(['Gold', 'Silver', 'Gold', 'Platinum']) Use:

np.unique(arr, return_counts=True)

Task 30: Apply np.cumsum() on a revenue array to get cumulative revenue over time. Hint: Common in time series and business KPIs. Input: np.array([100, 200, 150, 300]) Output: [100, 300, 450, 750]